*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

*  Create directories
cap mkdir "${root}/results"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/Zearn"

*-------------------------------------------------------------------------------
* Import Zearn data
*-------------------------------------------------------------------------------

project, uses("${root}/data/derived/Zearn/intermediate/zearn_table_data.csv")
import delimited "${root}/data/derived/Zearn/intermediate/zearn_table_data.csv", clear

* Keep one observation of Zearn data per cz
drop if date != "2020-01-27"

* Merge covariates
rename zcta5 zcta
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge m:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", keep(1 2 3) keepusing(medhhinc_2014_2018_est pop_2014_2018_est)

* Merge on school level demographics back onto additional zctas
rename (school_id) (mdrschoolsmdrschoolid) 
project, uses("${root}/data/derived/Zearn/intermediate/School demos_6.1.20_Added detail.dta")
merge m:1 mdrschoolsmdrschoolid using "${root}/data/derived/Zearn/intermediate/School demos_6.1.20_Added detail.dta", keep(1 2 3) nogen
rename (mdrschoolsmdrschoolid) (school_id)

* Number of students and schools in population using class data
count if ((!mi(mdrschoolsallgrades)) & (coverage_zip == 1))
local n_schools_Z = `r(N)'
di `n_schools_Z'

total(mdrschoolsallgrades) if coverage_zip == 1
local n_students_Z = _b[mdrschoolsallgrades]
di `n_students_Z'

* Number of students and schools in population using school demos data
count if !mi(mdrschoolsallgrades)
local n_schools_pop = `r(N)'
di `n_schools_pop'

total(mdrschoolsallgrades)
local n_students_pop = _b[mdrschoolsallgrades]
di `n_students_pop'

rename (mdrschoolsamericanindianalaskan) (mdrschoolsaia)
rename (mdrschoolsasianpacificislander) (mdrschoolsapi)

* Some obs are just zip demographics - some are just schools without Zearn data
count 
count if mi(students)

count if mi(pop_2014_2018_est)
* POPULATION: Store values for median household income
foreach x in 25 50 75 {
	foreach y in medhhinc_2014_2018_est {
		
		* Store Zearn weighted percentiles
		qui sum `y' [w=students] if coverage_zip == 1, d
		local `y'_`x'_Z = round(`r(p`x')', 0.0001)
		
		* Store population weighted percentiles
		qui sum `y' [w=pop_2014_2018_est], d
		local `y'_`x'_pop = round(`r(p`x')', 0.0001)
	}
}

* Get number of people and number of Zip codes
preserve
gcollapse (sum) students (mean) medhhinc_2014_2018_est pop_2014_2018_est (max) coverage_zip, by(zcta)

count if !mi(zcta) 
local n_zipcodes_pop = `r(N)'
di `n_zipcodes_pop'

total(pop_2014_2018_est)
local n_people_pop = _b[pop_2014_2018_est]
di `n_people_pop'

count if students > 0 & coverage_zip == 1
local n_zipcodes_Z = `r(N)'
di `n_zipcodes_Z'

total students if coverage_zip == 1
local n_people_Z = _b[students]
di `n_people_Z'
restore

* SCHOOL-LEVEL: FRPL and race shares

* FRPL shares
replace mdrschoolsfrl = 0 if mi(mdrschoolsfrl)
gen frpl_share = (mdrschoolsfrl / mdrschoolsallgrades)
sum frpl_share [w=students], d

* Store values for MDR (school-level) share black, share hispanic, FRPL
foreach x in 25 50 75 {
	foreach y in black_share hispanic_share aia_share api_share white_share frl_share{
		
		* Store percentiles among Zearn schools, weighting by base student counts (~9k of them)
		qui sum `y' [w=students] if coverage_zip == 1, d
		local `y'_`x'_Z = round(`r(p`x')', 0.0001)
		
		* Store percentiles among all schools, weighting by student counts (~120k of them)
		qui sum `y' [w=mdrschoolsallgrades], d
		local `y'_`x'_pop = round(`r(p`x')', 0.0001)
	}
}

* Display values
foreach y in medhhinc_2014_2018_est black_share hispanic_share aia_share api_share white_share frl_share { 
	foreach x in 25 50 75 {
		noi di "Zearn `y' p`x': ``y'_`x'_Z'"
	}
	noi di "  "
	foreach x in 25 50 75 {
		noi di "Population `y' p`x': ``y'_`x'_pop'"
	}
	noi di "  "
	noi di "  "
}

*-------------------------------------------------------------------------------
* Create table
*-------------------------------------------------------------------------------

clear
set obs 4

* Initialize variable row
gen var = ""
local i = 1

foreach y in medhhinc_2014_2018_est black_share hispanic_share frl_share { 
	replace var = "`y'" if _n == `i'
	local i = `i' + 1
}

* Fill dataset with values
foreach z in Z pop{
	foreach x in 25 50 75{
		gen `z'_`x' 	= .
	}
	foreach y in medhhinc_2014_2018_est black_share hispanic_share frl_share{ 
		foreach x in 25 50 75{
			replace `z'_`x' = ``y'_`x'_`z'' if var == "`y'"
		}
	}
}

reshape long Z_ pop_, i(var) j(pctile)
sort var pctile

local obs = _N + 4
set obs `obs'

replace var = "n_schools" if _n == 13
replace Z_ = `n_schools_Z' if _n == 13
replace pop_ = `n_schools_pop' if _n == 13
replace var = "n_students" if _n == 14
replace Z_ = `n_students_Z' if _n == 14
replace pop_ = `n_students_pop' if _n == 14

replace var = "n_zipcodes" if _n == 15
replace Z_ = `n_zipcodes_Z' if _n == 15
replace pop_ = `n_zipcodes_pop' if _n == 15
replace var = "n_people" if _n == 16
replace Z_ = `n_people_Z' if _n == 16
replace pop_ = `n_people_pop' if _n == 16

* Export
export excel "${root}/results/new_app_table_8_a_b.xlsx", sheet(new_app_table_8_a_b, replace)
project, creates("${root}/results/new_app_table_8_a_b.xlsx")

*-------------------------------------------------------------------------------
* Save number of students for paper
*-------------------------------------------------------------------------------

local n_people_Z_r = `=round(`n_people_Z'/10000, 1)'*10000

display `n_people_Z_r'

cap erase "${root}/results/paper numbers/Zearn/Number of students.yaml"

yamlout using "${root}/results/paper numbers/Zearn/Number of students.yaml", ///
		key("n_students") ///
		comment("number of students") ///
		value(`n_people_Z_r') fmt(%15.0fc)
		
